
getwd()

rm(list=ls())

library(tidyverse)
library(ggtext)
library(glue)

highlight = function(x, pat, color="black", family="") {
  ifelse(grepl(pat, x), glue("<b style='font-family:{family}; color:{color}'>{x}</b>"), x)
}


set.seed(1234)

#read ISIC scheme and create randomly assigned
ISIC4 <- readxl::read_excel("ancillary_data/ISIC_schemes.xlsx", sheet = "ISIC4") %>%
  select(ISIC4_group) %>%
  distinct() %>%
  rbind(.,.,.) %>%
  mutate(count = round(runif(min = 0, max = 1000, n = n()), 0))

#create a mock labour survey
mock_labour <- ISIC4 %>%
  select(ISIC_code = ISIC4_group, count) %>% 
  mutate(weight = rnorm(n = n(), mean = 1, sd = 0.1),
         weight = weight * count, #in real data, weight is the sum of all individual survey weights.
         country_short = "EXA",
         subnational_level = "Regions",
         subnational_name = rep(c("Capital", "South", "West"), each = n()/3),
         subnational_code = ifelse(subnational_name == "Capital", "EX-C",
                                   ifelse(subnational_name == "South", "EX-S", 
                                          ifelse(subnational_name == "West", "EX-W", NA))),
         year = 2015,
         estimated = 0)

#check distributions of ISIC4 groups, districts and weights
table(table(mock_labour$ISIC_code)) #each ISIC code appears exactly 3 times
hist(mock_labour$weight)
table(mock_labour$subnational_name)

#load RCA data
rca <- readRDS(file = "ancillary_data/mock_rca.rds")
#For the sake of demonstration, we use this mock rca data

sectors <- readxl::read_excel("ancillary_data/industry_groups.xlsx", na = "NA", sheet = "manu") %>%
  filter(ISIC_coding == "ISIC4_group") %>% 
  select(-ISIC_coding)

mock_comp <- dplyr::left_join(mock_labour, rca,
                              by = c("country_short", "year", "ISIC_code")) %>%
  mutate(rxa_raw = ifelse(rxa_raw %in% c(-Inf, Inf), NA, rxa_raw),
         rca_sym = ifelse(rca_sym %in% c(-Inf, Inf), NA, rca_sym),
         rca_add = ifelse(rca_add %in% c(-Inf, Inf), NA, rca_add),
         rca_net = ifelse(rca_net %in% c(-Inf, Inf), NA, rca_net),
         rca_tba = ifelse(rca_tba %in% c(-Inf, Inf), NA, rca_tba)) %>%
  dplyr::left_join(., sectors, by = "ISIC_code") %>% 
  dplyr::select(country_short, subnational_level, subnational_code, subnational_name, year, ISIC_code, count, weight, estimated, sector,
                contains("cntry_"), contains("world_"), contains("ex_"), contains("im_"), rxa_raw, rma_raw, contains("rca_"))

mock_comp_country <- mock_comp  %>%
  group_by(country_short, subnational_level, year) %>%
  summarise(stc_sym_trad_nat = questionr::wtd.mean(x = rca_sym[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")], weights = weight[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")],  na.rm = T),
            stc_add_trad_nat = questionr::wtd.mean(x = rca_add[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")], weights = weight[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")],  na.rm = T),
            stc_net_trad_nat = questionr::wtd.mean(x = rca_net[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")], weights = weight[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")],  na.rm = T),
            stc_tba_trad_nat = questionr::wtd.mean(x = rca_tba[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")], weights = weight[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")],  na.rm = T))

mock_comp_subnational <- mock_comp  %>%
  mutate(count_agri = count * ifelse(sector == "agri", 1, 0),
         count_mini = count * ifelse(sector == "mini", 1, 0),
         count_manu = count * ifelse(sector %in% c("manu_lotech", "manu_hitech"), 1, 0),
         count_malt = count * ifelse(sector == "manu_lotech", 1, 0),
         count_maht = count * ifelse(sector == "manu_hitech", 1, 0),
         count_good = count * ifelse(sector %in% c("agri", "mini", "manu_lotech", "manu_hitech"), 1, 0),
         count_sr_n = count * ifelse(sector == "sr_n", 1, 0),
         count_sr_t = count * ifelse(sector == "sr_t", 1, 0),
         count_serv = count * ifelse(sector %in% c("sr_n", "sr_t"), 1, 0),
         count_trad = count * ifelse(sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t"), 1, 0)) %>%
  group_by(country_short, subnational_level, subnational_code, subnational_name, year) %>%
  summarise(n_sectors = sum(ifelse(weight == 0, 0,1) * ifelse(is.na(rca_tba), 0,1)),
            estimated = mean(estimated, na.rm = T),
            count = sum(count, na.rm=T),
            count_agri = sum(count_agri, na.rm=T),
            count_mini = sum(count_mini, na.rm=T),
            count_manu = sum(count_manu, na.rm=T),
            count_malt = sum(count_malt, na.rm=T),
            count_maht = sum(count_maht, na.rm=T),
            count_good = sum(count_good, na.rm=T),
            count_sr_n = sum(count_sr_n, na.rm=T),
            count_sr_t = sum(count_sr_t, na.rm=T),
            count_serv = sum(count_serv, na.rm=T),
            count_trad = sum(count_trad, na.rm=T),
            weighted_count = sum(weight, na.rm=T),
            share_agri = questionr::wtd.mean(x = (sector == "agri" & !is.na(rca_tba)), weights = weight,  na.rm = T),
            share_mini = questionr::wtd.mean(x = (sector == "mini" & !is.na(rca_tba)), weights = weight,  na.rm = T),
            share_manu = questionr::wtd.mean(x = (sector %in% c("manu_lotech", "manu_hitech") & !is.na(rca_tba)), weights = weight,  na.rm = T),
            share_malt = questionr::wtd.mean(x = (sector == "manu_lotech" & !is.na(rca_tba)), weights = weight,  na.rm = T),
            share_maht = questionr::wtd.mean(x = (sector == "manu_hitech" & !is.na(rca_tba)), weights = weight,  na.rm = T),
            share_good = questionr::wtd.mean(x = (sector %in% c("agri", "mini", "manu_lotech", "manu_hitech") & !is.na(rca_tba)), weights = weight,  na.rm = T),
            share_sr_n = questionr::wtd.mean(x = (sector == "sr_n" & !is.na(rca_tba)), weights = weight,  na.rm = T),
            share_sr_t = questionr::wtd.mean(x = (sector == "sr_t" & !is.na(rca_tba)), weights = weight,  na.rm = T),
            share_serv = questionr::wtd.mean(x = (sector %in% c("sr_n", "sr_t") & !is.na(rca_tba)), weights = weight,  na.rm = T),
            share_trad = questionr::wtd.mean(x = (sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t") & !is.na(rca_tba)), weights = weight,  na.rm = T),
            
            stc_sym_trad_sub = questionr::wtd.mean(x = rca_sym[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")], weights = weight[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")],  na.rm = T),
            stc_add_trad_sub = questionr::wtd.mean(x = rca_add[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")], weights = weight[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")],  na.rm = T),
            stc_net_trad_sub = questionr::wtd.mean(x = rca_net[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")], weights = weight[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")],  na.rm = T),
            stc_tba_trad_sub = questionr::wtd.mean(x = rca_tba[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")], weights = weight[sector %in% c("agri", "mini", "manu_lotech", "manu_hitech", "sr_t")],  na.rm = T),
            
            stc_sym_agri_sub = questionr::wtd.mean(x = rca_sym[sector %in% c("agri")], weights = weight[sector %in% c("agri")],  na.rm = T),
            stc_add_agri_sub = questionr::wtd.mean(x = rca_add[sector %in% c("agri")], weights = weight[sector %in% c("agri")],  na.rm = T),
            stc_net_agri_sub = questionr::wtd.mean(x = rca_net[sector %in% c("agri")], weights = weight[sector %in% c("agri")],  na.rm = T),
            stc_tba_agri_sub = questionr::wtd.mean(x = rca_tba[sector %in% c("agri")], weights = weight[sector %in% c("agri")],  na.rm = T),
            
            stc_sym_mini_sub = questionr::wtd.mean(x = rca_sym[sector %in% c("mini")], weights = weight[sector %in% c("mini")],  na.rm = T),
            stc_add_mini_sub = questionr::wtd.mean(x = rca_add[sector %in% c("mini")], weights = weight[sector %in% c("mini")],  na.rm = T),
            stc_net_mini_sub = questionr::wtd.mean(x = rca_net[sector %in% c("mini")], weights = weight[sector %in% c("mini")],  na.rm = T),
            stc_tba_mini_sub = questionr::wtd.mean(x = rca_tba[sector %in% c("mini")], weights = weight[sector %in% c("mini")],  na.rm = T),
            
            stc_sym_manu_sub = questionr::wtd.mean(x = rca_sym[sector %in% c("manu_lotech", "manu_hitech")], weights = weight[sector %in% c("manu_lotech", "manu_hitech")],  na.rm = T),
            stc_add_manu_sub = questionr::wtd.mean(x = rca_add[sector %in% c("manu_lotech", "manu_hitech")], weights = weight[sector %in% c("manu_lotech", "manu_hitech")],  na.rm = T),
            stc_net_manu_sub = questionr::wtd.mean(x = rca_net[sector %in% c("manu_lotech", "manu_hitech")], weights = weight[sector %in% c("manu_lotech", "manu_hitech")],  na.rm = T),
            stc_tba_manu_sub = questionr::wtd.mean(x = rca_tba[sector %in% c("manu_lotech", "manu_hitech")], weights = weight[sector %in% c("manu_lotech", "manu_hitech")],  na.rm = T),
            
            stc_sym_malt_sub = questionr::wtd.mean(x = rca_sym[sector %in% c("manu_lotech")], weights = weight[sector %in% c("manu_lotech")],  na.rm = T),
            stc_add_malt_sub = questionr::wtd.mean(x = rca_add[sector %in% c("manu_lotech")], weights = weight[sector %in% c("manu_lotech")],  na.rm = T),
            stc_net_malt_sub = questionr::wtd.mean(x = rca_net[sector %in% c("manu_lotech")], weights = weight[sector %in% c("manu_lotech")],  na.rm = T),
            stc_tba_malt_sub = questionr::wtd.mean(x = rca_tba[sector %in% c("manu_lotech")], weights = weight[sector %in% c("manu_lotech")],  na.rm = T),
            
            stc_sym_maht_sub = questionr::wtd.mean(x = rca_sym[sector %in% c("manu_hitech")], weights = weight[sector %in% c("manu_hitech")],  na.rm = T),
            stc_add_maht_sub = questionr::wtd.mean(x = rca_add[sector %in% c("manu_hitech")], weights = weight[sector %in% c("manu_hitech")],  na.rm = T),
            stc_net_maht_sub = questionr::wtd.mean(x = rca_net[sector %in% c("manu_hitech")], weights = weight[sector %in% c("manu_hitech")],  na.rm = T),
            stc_tba_maht_sub = questionr::wtd.mean(x = rca_tba[sector %in% c("manu_hitech")], weights = weight[sector %in% c("manu_hitech")],  na.rm = T),
            
            stc_sym_sr_t_sub = questionr::wtd.mean(x = rca_sym[sector %in% c("sr_t")], weights = weight[sector %in% c("sr_t")],  na.rm = T),
            stc_add_sr_t_sub = questionr::wtd.mean(x = rca_add[sector %in% c("sr_t")], weights = weight[sector %in% c("sr_t")],  na.rm = T),
            stc_net_sr_t_sub = questionr::wtd.mean(x = rca_net[sector %in% c("sr_t")], weights = weight[sector %in% c("sr_t")],  na.rm = T),
            stc_tba_sr_t_sub = questionr::wtd.mean(x = rca_tba[sector %in% c("sr_t")], weights = weight[sector %in% c("sr_t")],  na.rm = T))

mock_comp <- dplyr::left_join(mock_comp_subnational, mock_comp_country) %>% # subtract national stc from subnational stc
  mutate(stc_sym = stc_sym_trad_sub - stc_sym_trad_nat, 
         stc_add = stc_add_trad_sub - stc_add_trad_nat,
         stc_net = stc_net_trad_sub - stc_net_trad_nat,
         stc_tba = stc_tba_trad_sub - stc_tba_trad_nat,
         
         stc_sym_agri = stc_sym_agri_sub - stc_sym_trad_nat,
         stc_add_agri = stc_add_agri_sub - stc_add_trad_nat,
         stc_net_agri = stc_net_agri_sub - stc_net_trad_nat,
         stc_tba_agri = stc_tba_agri_sub - stc_tba_trad_nat,
         
         stc_sym_mini = stc_sym_mini_sub - stc_sym_trad_nat,
         stc_add_mini = stc_add_mini_sub - stc_add_trad_nat, 
         stc_net_mini = stc_net_mini_sub - stc_net_trad_nat,
         stc_tba_mini = stc_tba_mini_sub - stc_tba_trad_nat,
         
         stc_sym_manu = stc_sym_manu_sub - stc_sym_trad_nat,
         stc_add_manu = stc_add_manu_sub - stc_add_trad_nat,
         stc_net_manu = stc_net_manu_sub - stc_net_trad_nat,
         stc_tba_manu = stc_tba_manu_sub - stc_tba_trad_nat,
         
         stc_sym_malt = stc_sym_malt_sub - stc_sym_trad_nat,
         stc_add_malt = stc_add_malt_sub - stc_add_trad_nat,
         stc_net_malt = stc_net_malt_sub - stc_net_trad_nat,
         stc_tba_malt = stc_tba_malt_sub - stc_tba_trad_nat,
         
         stc_sym_maht = stc_sym_maht_sub - stc_sym_trad_nat,
         stc_add_maht = stc_add_maht_sub - stc_add_trad_nat,
         stc_net_maht = stc_net_maht_sub - stc_net_trad_nat,
         stc_tba_maht = stc_tba_maht_sub - stc_tba_trad_nat,
         
         stc_sym_sr_t = stc_sym_sr_t_sub - stc_sym_trad_nat,
         stc_add_sr_t = stc_add_sr_t_sub - stc_add_trad_nat,
         stc_net_sr_t = stc_net_sr_t_sub - stc_net_trad_nat,
         stc_tba_sr_t = stc_tba_sr_t_sub - stc_tba_trad_nat) %>%
  dplyr::select(-contains("_sub"), -contains("_nat")) %>%
  dplyr::select(country_short:estimated, count:stc_tba_sr_t) %>%
  ungroup()

dplyr::left_join(mock_comp %>%
                   ungroup() %>%
                   dplyr::select(subnational_code, subnational_name, contains("stc")) %>%
                   gather(stc_sym:stc_tba_sr_t, value = "value", key = "competitiveness") %>% 
                   mutate(sector = factor(ifelse(grepl("malt", competitiveness), "Manu. (low-tech)",
                                                 ifelse(grepl("maht", competitiveness), "Manu. (high-tech)",
                                                        ifelse(grepl("manu", competitiveness), "Manufacturing",
                                                               ifelse(grepl("mi", competitiveness), "Mining",
                                                                      ifelse(grepl("ag", competitiveness), "Agriculture",
                                                                             ifelse(grepl("sr", competitiveness), "Services", "Overall")))))),
                                          levels = c("Overall", "Agriculture", "Mining",  "Manu. (low-tech)", "Manu. (high-tech)", "Services")),
                          measure = factor(substr(competitiveness, 5,7),
                                           levels = c("sym", "add", "net", "tba")),
                          dst_lab = paste0(subnational_name, "\n(", subnational_code, ")"),
                          type = factor(ifelse(measure %in% c("sym", "add"), "Exports", "Trade~ Balance")),
                          measure= factor(measure, 
                                          levels = c("sym", "add", "net", "tba"),
                                          labels = c(bquote(STC~ (symmetric)), bquote(STC~ (additive)), bquote(STC~ (net)[OV]), bquote(STC~ (trade~ balance))))),
                 mock_comp %>% 
                   ungroup() %>%
                   dplyr::select(subnational_code, subnational_name, share_agri, share_mini, share_manu, share_malt, share_maht, share_sr_t, share_trad) %>%
                   gather(share_agri:share_trad, value = "value_share", key = "share") %>%
                   mutate(sector = factor(ifelse(grepl("trad", share), "Overall",
                                                 ifelse(grepl("malt", share), "Manu. (low-tech)",
                                                        ifelse(grepl("maht", share), "Manu. (high-tech)",
                                                               ifelse(grepl("manu", share), "Manufacturing",
                                                                      ifelse(grepl("mi", share), "Mining",
                                                                             ifelse(grepl("ag", share), "Agriculture",
                                                                                    ifelse(grepl("sr_t", share), "Services", NA))))))),
                                          levels = c("Overall", "Agriculture", "Mining", "Manu. (low-tech)", "Manu. (high-tech)", "Services"))),
                 by = c("subnational_code", "subnational_name", "sector")) %>% 
  mutate(dst_lab = gsub(" ", "~ ", dst_lab)) %>%
  filter(grepl("stc_tba", competitiveness) | grepl("stc_sym", competitiveness)) %>%
  ggplot(., aes(x = sector, y = value, size = value_share, colour = subnational_name)) + 
  geom_hline(yintercept = 0, lty = "dotted") + 
  geom_segment(aes(xend = sector, y = 0, yend = value), alpha = .2) +
  geom_point() + 
  ggh4x::facet_nested(type+measure~dst_lab, scale = "free_y", nest_line = T, labeller = label_parsed) + 
  labs(x = "\nSector", y = "Subnational Trade Competitiveness\n") + 
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
  scale_shape_manual("",
                     values = c(1, 4, 10)) +
  scale_colour_manual("", values = c("#00BFC4", "#00B4F0", "#C77CFF", "#FF64B0")) +
  scale_x_discrete(labels= function(x) highlight(x, "Overall", "black")) +
  theme(axis.text.x=element_markdown()) +
  theme(legend.position = "none") + 
  coord_cartesian(clip = "off") +
  NULL
